home *** CD-ROM | disk | FTP | other *** search
- /*
- cookietool is (c) 1995-2000 by Wilhelm Noeker (wnoeker@t-online.de)
-
- This program is free software; you can redistribute it and/or
- modify it under the terms of the GNU General Public License as
- published by the Free Software Foundation; either version 2 of the
- License, or (at your option) any later version.
-
- This program is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
- 02111-1307 USA
-
- */
-
-
- /*========================================================================*\
- | File: cookietool.c Date: 25 Oct 1997 |
- *------------------------------------------------------------------------*
- | Remove duplicate entries from a cookie file, |
- | various options for sorting the output. |
- | Expected file format is plain text with a "%%" line ending each cookie.|
- | See help() for usage notes. |
- | |
- \*========================================================================*/
-
- #include <stdio.h>
- #include <stdlib.h>
- #include <string.h>
- #include <ctype.h>
- #include "strstuff.h"
-
- char version[] = "$VER: cookietool 2.3 (17.07.2000)";
- #define EOC "%%" /* the cookie delimiter */
-
-
- struct cookie
- {
- UBYTE *text;
- UBYTE *sorthook;
- long size;
- long number;
- };
-
- struct cookie *clist;
-
- long listsize = 1000; /* will be adjusted dynamically */
- long listed = 0;
-
- #define FBUFSIZE 16384 /* we'll use larger file buffers */
- #define CBUFSIZE 32000L
- #define LBUFSIZE 2000
- UBYTE cbuf[ CBUFSIZE ]; /* large enough to hold one complete cookie */
- UBYTE line[ LBUFSIZE ]; /* large enough to hold the longest line */
-
-
-
- /*
- * Print a help text and nag about illegal parameter <s>
- */
- void help( UBYTE *s )
- {
- if( s )
- printf( "illegal option '%s'\n", s );
- printf( "usage: cookietool [ options ] <database> \n" );
- printf( "where options are:\n" );
- printf( " -p passive, don't delete anything\n" );
- printf( " -a treat 'abbreviations' as doubles (i.e. delete them, too)\n" );
- printf( " -s sort cookies\n" );
- printf( " -sl \" , looking at the last line only\n" );
- printf( " -sw \" , looking at the last word only\n" );
- printf( " -s<sep> \" , starting after the last <sep>, e.g. '-s--'\n" );
- printf( " -ss \" , by size\n" );
- printf( " -d[ 0-3 ] how fussy about word delimiters? (default: 2)\n" );
- printf( " -c case sensitive comparisons\n" );
- printf( " -o overwrite directly, no tempfile (caution!)\n" );
- }
-
-
-
- int cookie_cmp( struct cookie *a, struct cookie *b, int mode )
- {
- int c = 0;
-
- switch( mode )
- {
- case 0: /* by number */
- c = a->number - b->number;
- break;
- case 1: case -1: /* by name, ascending/descending */
- c = str_cmp( a->sorthook, b->sorthook );
- if( c == 0 ) /* when in doubt, the number decides */
- c = a->number - b->number;
- c *= mode;
- break;
- case 2: /* by size */
- c = a->size - b->size;
- break;
- }
- return c;
- }
-
-
-
- /*
- * sift: centre routine to heapsort()
- */
- void sift( struct cookie v[], long i, long m, int mode )
- {
- long j;
- struct cookie temp;
-
- while( (j = 2 * (i + 1) - 1) <= m )
- {
- if( j < m && cookie_cmp( &v[ j ], &v[ j + 1 ], mode ) < 0 )
- j++;
- if( cookie_cmp( &v[ i ], &v[ j ], mode ) < 0 )
- {
- temp = v[ i ];
- v[ i ] = v[ j ];
- v[ j ] = temp;
- i = j;
- }
- else
- i = m; /* done */
- }
- }
-
-
- void my_heapsort( struct cookie v[], long n, int mode )
- {
- long i;
- struct cookie temp;
-
- if( n < 2 ) /* no sorting necessary */
- return;
- for( i = n/2 - 1; i >= 0; i-- )
- sift( v, i, n - 1, mode );
- for( i = n - 1; i >= 1; i-- )
- {
- temp = v[ 0 ];
- v[ 0 ] = v[ i ];
- v[ i ] = temp;
- sift( v, 0, i - 1, mode );
- }
- }
-
-
- UBYTE hooktarget[ 16 ];
-
- void set_hooks( int mode )
- /* adjust sorthooks for the final sort, according to the desired mode */
- {
- long l;
- int hot;
- UBYTE *s;
-
- printf( "Adjusting sort hooks" );
- fflush( stdout );
- for( l = 0; l < listed; l++ )
- {
- s = clist[ l ].text;
- switch( mode )
- {
- case 3: /* start of last line */
- hot = 1;
- while( *s )
- {
- if( *s == '\n' )
- hot = 1;
- else if( hot )
- {
- clist[ l ].sorthook = s;
- hot = 0;
- }
- s++;
- }
- break;
- case 4: /* start of last word */
- hot = 1;
- while( *s )
- {
- if( isspace( *s ) )
- hot = 1;
- else if( hot )
- {
- clist[ l ].sorthook = s;
- hot = 0;
- }
- s++;
- }
- break;
- case 5: /* at last occurence of <hooktarget> */
- while( s )
- {
- clist[ l ].sorthook = s++;
- s = strstr( s, hooktarget );
- }
- break;
- default:
- }
- }
- printf( ", done.\n" );
- }
-
-
-
- /*
- * Delete cookies and log them to a file
- */
- void one_cookie( int doubles, int abbrevs, int finalsort, FILE *fp )
- {
- long i, j, dbl = 0, abr = 0;
- int c;
-
- if( doubles )
- {
- printf( "Removing double entries" );
- if( abbrevs )
- printf( " + 'abbreviations'" );
- fflush( stdout );
- my_heapsort( clist, listed, -1 ); /* sort descending by string */
- for( i = listed - 1; i > 0; i = j )
- {
- for( j = i - 1; j >= 0
- && ( (c = str_cmp( clist[ j ].text, clist[ i ].text )) == 0
- || (abbrevs && c == STR_SHORTER) ); j-- )
- {
- if( fp )
- if( fprintf( fp, "%s\n%s\n", clist[ j ].text, EOC ) <= 0 )
- {
- printf( "\nFile error, aborted !!!\n" );
- exit( 20 );
- }
- free( clist[ j ].text );
- clist[ j ] = clist[ --listed ];
- if( c == 0 )
- dbl++;
- else
- abr++;
- }
- }
- printf( ", done. (%ld + %ld found)\n", dbl, abr );
- }
- if( finalsort > 0 )
- {
- if( finalsort > 2 )
- set_hooks( finalsort );
- printf( "Sorting" );
- fflush( stdout );
- if( finalsort == 2 )
- my_heapsort( clist, listed, 2 ); /* sort by size */
- else
- my_heapsort( clist, listed, 1 ); /* sort ascending by string */
- }
- else
- {
- printf( "Restoring order" );
- fflush( stdout );
- my_heapsort( clist, listed, 0 ); /* sort by number */
- }
- printf( ", done.\n" );
- }
-
-
- void read_cookies( FILE *fp )
- {
- long cbuflen, ignored = 0;
- int lines;
-
- printf( "Reading cookies" );
- fflush( stdout );
- strcpy( cbuf, "" );
- lines = 0;
- cbuflen = 0;
- while( fgets( line, LBUFSIZE, fp ) )
- {
- if( strncmp( line, EOC, strlen( EOC ) ) == 0 )
- { /* "end of cookie"-marker */
- if( lines > 0 )
- { /* store the cookie */
- /* but drop the last LF, to avoid trouble in recognizing abbrev's: */
- cbuflen = strlen( cbuf );
- if( cbuf[ cbuflen - 1 ] == '\n' )
- cbuf[ --cbuflen ] = '\0';
- clist[ listed ].text = malloc( cbuflen + 1 ); /* mind the '\0'! */
- if( clist[ listed ].text != NULL )
- {
- clist[ listed ].number = listed + ignored;
- clist[ listed ].size = cbuflen;
- strcpy( clist[ listed ].text, cbuf );
- clist[ listed ].sorthook = clist[ listed ].text;
- }
- else
- {
- printf( "\nOut of memory\n" );
- exit( 20 );
- }
- if( ++listed == listsize )
- {
- listsize = 3 * listsize / 2;
- clist = realloc( clist, listsize * sizeof( struct cookie ) );
- if( !clist )
- {
- printf( "\nList reallocation failed\n" );
- exit( 20 );
- }
- }
- }
- else
- ignored++; /* or ignore it */
- /* start a new one */
- strcpy( cbuf, "" );
- lines = 0;
- cbuflen = 0;
- }
- else
- {
- if( (cbuflen += strlen( line ) ) >= CBUFSIZE )
- {
- printf( "\nCookie too big( >%ld chars )\n", CBUFSIZE );
- exit( 20 );
- }
- strcat( cbuf, line );
- lines++;
- }
- }
- printf( ", done. (%ld read, %ld empty)\n", listed, ignored );
- }
-
-
- /*
- * Write cookies to file,
- * also frees the allocated memory!
- */
- void write_cookies( FILE *fp )
- {
- long l;
-
- printf( "Writing cookies" );
- fflush( stdout );
- for( l = 0; l < listed; l++ )
- {
- if( fprintf( fp, "%s\n%s\n", clist[ l ].text, EOC ) <= 0 )
- {
- printf( "\nFile error, aborted !!!\n" );
- exit( 20 );
- }
- free( clist[ l ].text );
- }
- printf( ", done. (%ld written)\n", listed );
- }
-
-
- int main( int argc, char *argv[] )
- {
- UBYTE *s;
- int dirty = 0, passive = 0, abbrevs = 0, finalsort = 0;
- int case_sense = 0, bordermode = 2;
- UBYTE name1[ 100 ], name2[ 100 ], name3[ 100 ];
- FILE *infile, *outfile, *logfile;
-
- name1[ 0 ] = name2[ 0 ] = name3[ 0 ] = '\0';
- if( argc < 2 )
- {
- help( NULL );
- return 5;
- }
- while( --argc )
- {
- s = *++argv;
- if( *s != '-' )
- {
- if( name1[ 0 ] == '\0' )
- strcpy( name1, s );
- else
- strcpy( name3, s );
- }
- else
- {
- switch( *++s )
- {
- case 's':
- switch( *++s )
- {
- case '\0':
- finalsort = 1;
- break;
- case 's':
- finalsort = 2;
- break;
- case 'l':
- finalsort = 3;
- break;
- case 'w':
- finalsort = 4;
- break;
- default:
- if( ispunct( *s ) )
- {
- finalsort = 5;
- strncpy( hooktarget, s, 15 );
- }
- else
- {
- help( argv[ 0 ] );
- return 5;
- }
- }
- break;
- case 'd':
- if( isdigit( *++s ) )
- bordermode = atoi( s );
- else
- {
- help( argv[ 0 ] );
- return 5;
- }
- break;
- case 'c':
- case_sense = 1;
- break;
- case 'a':
- abbrevs = 1;
- break;
- case 'p':
- passive = 1;
- break;
- case 'o':
- dirty = 1;
- break;
- default:
- help( argv[ 0 ] );
- return 5;
- }
- }
- }
- /* important, before calling anything from strstuff: */
- str_setup( bordermode, case_sense );
- if( name1[ 0 ] == '\0' )
- {
- help( NULL );
- return 5;
- }
- if( dirty )
- {
- strcpy( name2, name1 );
- printf( "Warning! You have enabled direct writeback mode!\n" );
- printf( "\e[2mDon't break (or crash) cookietool now, " );
- printf( "or you will inevitably lose data!\e[0m\n" );
- }
- else
- strcpy( name2, "ct_temp_crunchfile" );
- printf( "CookieTool " );
- print_strstat();
- clist = malloc( listsize * sizeof( struct cookie ) );
- if( !clist )
- {
- printf( "List allocation failed\n" );
- return 20;
- }
- if( !(infile = fopen( name1, "r" ) ) )
- {
- printf( "Can't open %s for input!\n", name1 );
- return 10;
- }
- setvbuf( infile, NULL, _IOFBF, FBUFSIZE );
- if( name3[ 0 ] != '\0' )
- {
- if( !(logfile = fopen( name3, "w" ) ) )
- {
- printf( "Can't open %s for output!\n", name3 );
- return 10;
- }
- }
- else
- logfile = NULL;
- read_cookies( infile );
- fclose( infile );
- one_cookie( !passive, abbrevs, finalsort, logfile );
- if( logfile )
- fclose( logfile );
- if( !(outfile = fopen( name2, "w" ) ) )
- {
- printf( "Can't open %s for output!\n", name2 );
- return 10;
- }
- setvbuf( outfile, NULL, _IOFBF, FBUFSIZE );
- write_cookies( outfile );
- fclose( outfile );
- free( clist );
- if( !dirty )
- { /* replace the input file */
- if( remove( name1 ) != 0 || rename( name2, name1 ) != 0 )
- {
- printf( "Couldn't overwrite the input file! Your cookies are in '%s'.\n", name2 );
- return 5;
- }
- }
- return 0;
- }
-
-